clear
set more off


use "${wd}/nlrb/nlrb7799_02.dta",clear


drop  maxdate closed_date election_date sic ///
	num idnlrb var* fiscalyear elect_id ///
	region type docket size_cat vote_los* union_l1* union_l2* ///
	duplic union_partic union_incumb rectype union_w_code address ///
	union_code casenum localnum pet_type employer unionname dups* ///
	checker temp tot year_clos month_clos year_elect month_elect recent_elect 


/////*****************Method for fixing missing county codes*****************/////

// Missing data

drop if cntycode=="" | cntycode=="000" ///
	| cntycode=="-89" | cntycode=="-97" 
	
// 1029 observations with problematic county codes
keep nlrb_id state1 cnt*

duplicates drop nlrb_id cntycode state1,force

duplicates tag nlrb_id,g(dups)
drop if dups>0

merge 1:m nlrb_id using "${temp}/nlrb_representation_1961_1998.dta"
keep if _merge == 3
drop _merge

order sta* cn* cou*

keep state1 state cntycode cntytext county_name

// Identify relevant counties that need to be recoded
// Ones that are matched to 1961-1998
preserve

	use "${wd}/nlrb/nlrb7799_02.dta",clear


	drop  maxdate closed_date election_date sic ///
		num idnlrb var* fiscalyear elect_id ///
		region type docket size_cat vote_los* union_l1* union_l2* ///
		duplic union_partic union_incumb rectype union_w_code address ///
		union_code casenum cntytext localnum pet_type employer unionname dups* ///
		checker temp tot year_clos month_clos year_elect month_elect recent_elect 


	ren cntycode unit_location_county

	keep if unit_location_county=="" | unit_location_county=="000" ///
		| unit_location_county=="-89" | unit_location_county=="-97" 
		
	// 1029 observations with problematic county codes
	duplicates drop nlrb_id,force

	keep nlrb_id

	merge 1:m nlrb_id using "${temp}/nlrb_representation_1961_1998.dta"
	keep if _merge==3

	keep county_name
	duplicates drop county_name,force

	save "${temp}/counties_to_recode.dta",replace

restore

merge m:1 county_name using "${temp}/counties_to_recode.dta"
keep if _merge == 3
drop _merge



// Recode states
g num = 1
collapse (rawsum) num ,by(state1 state cntycode cntytext county_name)



replace state1 = "AL" if inlist(state1,"ALL","ALA")
replace state1 = "AR" if inlist(state1,"ARK")
replace state1 = "AK" if inlist(state1,"ALK")
replace state1 = "AZ" if inlist(state1,"ARI")
replace state1 = "CA" if inlist(state1,"CAL","CA-")
replace state1 = "CO" if inlist(state1,"COL")
replace state1 = "CT" if inlist(state1,"CON")
replace state1 = "DE" if inlist(state1,"DEL")
replace state1 = "FL" if inlist(state1,"FLA")
replace state1 = "HI" if inlist(state1,"HAW")
replace state1 = "ID" if inlist(state1,"IDA")
replace state1 = "ID" if inlist(state1,"IDO")
replace state1 = "IL" if inlist(state1,"ILL")
replace state1 = "IN" if inlist(state1,"IND")
replace state1 = "IA" if inlist(state1,"IOW")
replace state1 = "KS" if inlist(state1,"KS","KAN")
replace state1 = "MA" if inlist(state1,"MAS")
replace state1 = "MI" if inlist(state1,"MIC")
replace state1 = "MN" if inlist(state1,"MIN")
replace state1 = "MS" if inlist(state1,"MIS")
replace state1 = "MT" if inlist(state1,"MON")
replace state1 = "NE" if inlist(state1,"NEB")
replace state1 = "NV" if inlist(state1,"NEV")
replace state1 = "OH" if inlist(state1,"OHI","OH&")
replace state1 = "OK" if inlist(state1,"OKL")
replace state1 = "OR" if inlist(state1,"ORE")
replace state1 = "PA" if inlist(state1,"PEN")
replace state1 = "TN" if inlist(state1,"TEN")
replace state1 = "TX" if inlist(state1,"TEX")
replace state1 = "UT" if inlist(state1,"UTA")
replace state1 = "VT" if inlist(state1,"VER")
replace state1 = "WA" if inlist(state1,"WAS","W&S")
replace state1 = "WI" if inlist(state1,"WIS")
replace state1 = "WV" if inlist(state1,"WVA")
replace state1 = "WY" if inlist(state1,"WYO")


g state_text = ""
replace state_text ="AL" if state == 	1
replace state_text ="AK" if state == 	2
replace state_text ="AZ" if state == 	4
replace state_text ="AR" if state == 	5
replace state_text ="CA" if state == 	6
replace state_text ="CO" if state == 	8
replace state_text ="CT" if state == 	9
replace state_text ="DE" if state == 	10
replace state_text ="DC" if state == 	11
replace state_text ="FL" if state == 	12
replace state_text ="GA" if state == 	13
replace state_text ="HI" if state == 	15
replace state_text ="ID" if state == 	16
replace state_text ="IL" if state == 	17
replace state_text ="IN" if state == 	18
replace state_text ="IA" if state == 	19
replace state_text ="KS" if state == 	20
replace state_text ="KY" if state == 	21
replace state_text ="LA" if state == 	22
replace state_text ="ME" if state == 	23
replace state_text ="MD" if state == 	24
replace state_text ="MA" if state == 	25
replace state_text ="MI" if state == 	26
replace state_text ="MN" if state == 	27
replace state_text ="MS" if state == 	28
replace state_text ="MO" if state == 	29
replace state_text ="MT" if state == 	30
replace state_text ="NE" if state == 	31
replace state_text ="NV" if state == 	32
replace state_text ="NH" if state == 	33
replace state_text ="NJ" if state == 	34
replace state_text ="NM" if state == 	35
replace state_text ="NY" if state == 	36
replace state_text ="NC" if state == 	37
replace state_text ="ND" if state == 	38
replace state_text ="OH" if state == 	39
replace state_text ="OK" if state == 	40
replace state_text ="OR" if state == 	41
replace state_text ="PA" if state == 	42
replace state_text ="RI" if state == 	44
replace state_text ="SC" if state == 	45
replace state_text ="SD" if state == 	46
replace state_text ="TN" if state == 	47
replace state_text ="TX" if state == 	48
replace state_text ="UT" if state == 	49
replace state_text ="VT" if state == 	50
replace state_text ="VA" if state == 	51
replace state_text ="WA" if state == 	53
replace state_text ="WV" if state == 	54
replace state_text ="WI" if state == 	55
replace state_text ="WY" if state == 	56

g check = 1 if state_text != state1 
sort check

sum num if check == 1
disp `r(sum)'		// 3349
sum num if check != 1
disp `r(sum)' // 94589

drop if state_text != state1 // 1623


collapse (rawsum) num ,by(state1 state cntycode cntytext county_name)

// Get fraction of overlap
bys state1 cntycode: egen tot1 = total(num)
bys state1 cntycode county_name: egen tot2 = total(num)

g fraction = tot2/tot1

keep if fraction>=.25 // problem at this point

// Get fraction in other direction
bys state1 county_name: egen tot3 = total(num)
bys state1 cntycode county_name: egen tot4 = total(num)

g fraction2 = tot4/tot3

keep if fraction>=.25 & fraction2 >=.25 // problem at this stage

gsort county_name - num

duplicates tag state1 county_name,g(dups)

sort dups
g blank = 1 if cntytext == ""
bys state1 cntycode: egen check = total(blank)

sort dups check county_name

drop if dups >0 & blank == 1 & check == 1
drop blank check

drop dups
duplicates tag county_name,g(dups)

duplicates tag county_name state cntycode,g(dups2)

bys county_name state cntycode: egen tot5 = total(num)
bys county_name state cntycode cntytext: egen tot6 = total(num)

g fraction3 = tot6/tot5

sort dups county_name state cntycode

keep if fraction3>=.25

drop dups*
duplicates tag county_name, g(dups)
tab dups

sort dups county_name


// identify incorrect states

decode county_name, g(county_name_str)
g statecheck = substr(county_name_str,1,2)

drop if statecheck != state1

duplicates drop cntycode county_name state1,force


drop dups*
duplicates tag county_name, g(dups)
tab dups

preserve
	keep if dups >0
	drop dups
	
	tostring state,g(statestr)
	replace statestr="0"+statestr if length(statestr)==1 

	g county=statestr+cntycode
	destring county,replace

	g city=.
	do "${nlrb}/county_city1973_1980"
	ren city city3

	g city =.
	do "${nlrb}/county_city1981_1989"
	ren city city4

	g city =.
	do "${nlrb}/county_city1990_1992"
	ren city city5

	g city =.
	do "${nlrb}/county_city1990_1992"
	ren city city6

	g city =.
	do "${nlrb}/county_city_cbsa"
	ren city city7

	order cntycode county_name* city*
	sort county_name
	
	// Identify counties which lead to same city definition in every year
	drop if county_name == 450396  & cntycode == "163"	
	drop if county_name == 450399  & cntycode == "129"	
	drop if county_name == 758476  & cntycode == "111"	
	drop if county_name == 929104  & cntycode == "019"	
	drop if county_name == 1101842  & cntycode == "109"	
	drop if county_name == 1101859  & cntycode == "113"	
	drop if county_name == 1135861  & cntycode == "005"	
	drop if county_name == 1384028  & cntycode == "485"	
	drop if county_name == 1497573  & cntycode == "045"	
	drop if county_name == 1581104  & cntycode == "087"	
	drop if county_name == 1581130  & cntycode == "073"	
	drop if county_name == 1622975  & cntycode == "037"	
	drop if county_name == 1718426  & cntycode == "063"	

	// Base on size
	drop if county_name == 306914  & cntycode == "053"	
	drop if county_name == 357428  & cntycode == "063"	
	drop if county_name == 450356  & cntycode == "127"	
	drop if county_name == 450387  & cntycode == "053"
	drop if county_name == 1074071  & cntycode == "091"
	drop if county_name == 1209186  & cntycode == "097"
	
	// remaining based on county text names
	drop if county_name == 974151  & cntycode == "117"
	drop if county_name == 1074020  & cntycode == "101"
	drop if county_name == 1074020  & cntycode == "107"
	
	// One cannot separate
	drop if county_name == 1497573  
	
	duplicates tag county_name,g(dups)
	tab dups
	drop dups
	
	save "${temp}/issues.dta",replace
restore

drop if dups > 0

append using "${temp}/issues.dta"

cap drop dups
duplicates tag county_name,g(dups)
tab dups
drop dups

keep state1 state cnty* county_name

save "${temp}/county_merge.dta",replace















